#Replication: The Impact of College Education on Fertility (Brand and Davis 2011)
#Tracey Shollenberger, Kristin Perkins
#To Upload May 7, 2012

#Stata do-file "HTE Stata_Upload.do" follows

setwd("/Users/Kristin/Dropbox/gov2001 replication/from Brand")
getwd()

#install.packages("SDMTools") #for weighted means and standard deviations
library(SDMTools)

#Load Stata data file for first treatment, college attendance, from Brand and Davis
library(foreign)
analysis19<-read.dta("analysis19data.dta")
head(analysis19)
dim(analysis19)

#these data cover restricted NLSY79 sample: 
# 1.women who were 14-17 years old at the baseline survey in 1979 (n=2736)
# 2.who had completed at least the 12th grade when they were 19 years old (n=2090)
# 3.who did not have missing data on college attainment (n=2013)


#################################
#No College attendance by age 19#
#################################

#limit data to control cases, no college attendance by age 19
#coll19enr is college attendance var
control19<-subset(analysis19, coll19enr==0, select=id:maedfaed)

#weighted means and standard deviations

X <- cbind(control19$black, control19$hisp, control19$i_bmmaed, control19$i_bmfaed, control19$i_parinc, control19$i_intact, control19$i_sibsz, control19$USborn, control19$i_farm, control19$i_south, control19$i_catholic, control19$i_jewish, control19$i_abil, control19$i_hsprog, control19$i_parencq, control19$i_frndplq, control19$i_mom18, control19$i_mom22)

tsamp19<-weighted.mean(analysis19$coll19enr, analysis19$weight, na.rm=FALSE)
csamp19<-1-tsamp19

cmean19 <- weighted.means <- c(apply(X, 2, FUN = function(x) weighted.mean(x, control19$weight, na.rm=FALSE)), weighted.mean(control19$children41, control19$weight, na.rm=TRUE), nrow(control19), csamp19)
cmean19[5] <- cmean19[5] * 100000
cmean19

X2 <- cbind(X, control19$children41)

csd19 <- weighted.sd <- c(apply(X2, 2, FUN = function(x) wt.sd(x, control19$weight)),0,0)
csd19[5] <- csd19[5] * 100000
csd19

################################
#re-do descriptives including fertility expectations
################################

#data file newanalysis19_n2013.dta is our Fertility_Plans.dta file merged with analysis19.dta from Brand and Davis
expect19<-read.dta("newanalysis19_n2013.dta")
dim(expect19)

#Brand and Davis use single imputation. We mimic that here by 1. running the Amelia package to impute missing values on the fertility plans variables; and 2.using only the first Amelia dataset. Detailed code available upon request. "expect19a1.dta" is our data file that inlcudes imputed values for the fertility plans variables.

expect19a1<-read.dta("expect19a1.dta")
dim(expect19a1)

expect19c<-subset(expect19a1, coll19enr==0, select=id:children41)
dim(expect19c)

Xec19<- cbind(expect19c$black, expect19c$hisp, expect19c$i_bmmaed, expect19c$i_bmfaed, expect19c$i_parinc, expect19c$i_intact, expect19c$i_sibsz, expect19c$USborn, expect19c$i_farm, expect19c$i_south, expect19c$i_catholic, expect19c$i_jewish, expect19c$i_abil, expect19c$i_hsprog, expect19c$i_parencq, expect19c$i_frndplq, expect19c$i_mom18, expect19c$i_mom22, expect19c$idealkid79, expect19c$desirekid79, expect19c$expectkid79, expect19c$diffkid, expect19c$children41)

Xetsamp19<-weighted.mean(expect19a1$coll19enr, expect19a1$weight, na.rm=FALSE)
Xecsamp19<-1-Xetsamp19

expect19cmean<-weighted.means<-c(apply(Xec19, 2, FUN=function(x) weighted.mean(x, expect19c$weight, na.rm=TRUE)), nrow(expect19c), Xecsamp19)
expect19cmean[5]<-expect19cmean[5]*100000
expect19cmean

expect19csd <- weighted.sd <- c(apply(Xec19, 2, FUN = function(x) wt.sd(x, expect19c$weight)),0,0)
expect19csd[5] <- expect19csd[5] * 100000
expect19csd

##############################
#College attendance by age 19#
##############################

#limit data to treatment cases, college attendance by age 19
#coll19enr is college attendance var
treatment19<-subset(analysis19, coll19enr==1, select=c(id, weight, coll19enr, USborn, black, hisp, i_bmmaed, i_bmfaed, i_parinc, i_intact, i_sibsz, i_farm, i_south, i_catholic, i_jewish, i_abil, i_hsprog, i_parencq, i_frndplq, i_mom18, i_mom22, children41))
dim(treatment19)

XT <- cbind(treatment19$black, treatment19$hisp, treatment19$i_bmmaed, treatment19$i_bmfaed, treatment19$i_parinc, treatment19$i_intact, treatment19$i_sibsz, treatment19$USborn, treatment19$i_farm, treatment19$i_south, treatment19$i_catholic, treatment19$i_jewish, treatment19$i_abil, treatment19$i_hsprog, treatment19$i_parencq, treatment19$i_frndplq, treatment19$i_mom18, treatment19$i_mom22)

tmean19 <- c(apply(XT, 2, FUN = function(x) weighted.mean(x, treatment19$weight, na.rm=FALSE)), weighted.mean(treatment19$children41, treatment19$weight, na.rm=TRUE), nrow(treatment19), tsamp19)
tmean19[5] <- tmean19[5] * 100000
tmean19

X2T <- cbind(XT, treatment19$children41)

tsd19 <- c(apply(X2T, 2, FUN = function(x) wt.sd(x, treatment19$weight)),0,0)
tsd19[5] <- tsd19[5] * 100000
tsd19

################################
#re-do descriptives including fertility expectations
################################

expect19t<-subset(expect19a1, coll19enr==1, select=id:children41)
dim(expect19t)

Xet19<- cbind(expect19t$black, expect19t$hisp, expect19t$i_bmmaed, expect19t$i_bmfaed, expect19t$i_parinc, expect19t$i_intact, expect19t$i_sibsz, expect19t$USborn, expect19t$i_farm, expect19t$i_south, expect19t$i_catholic, expect19t$i_jewish, expect19t$i_abil, expect19t$i_hsprog, expect19t$i_parencq, expect19t$i_frndplq, expect19t$i_mom18, expect19t$i_mom22, expect19t$idealkid79, expect19t$desirekid79, expect19t$expectkid79, expect19t$diffkid, expect19t$children41)

expect19tmean<-weighted.means<-c(apply(Xet19, 2, FUN=function(x) weighted.mean(x, expect19t$weight, na.rm=TRUE)), nrow(expect19t), Xetsamp19)
expect19tmean[5]<-expect19tmean[5]*100000
expect19tmean

expect19tsd <- weighted.sd <- c(apply(Xet19, 2, FUN = function(x) wt.sd(x, expect19t$weight)),0,0)
expect19tsd[5] <- expect19tsd[5] * 100000
expect19tsd

#################################
#No College completion by age 23#
#################################
analysis23<-read.dta("analysis23data.dta")
head(analysis23)
dim(analysis23)

#limit data to control cases, no college attendance by age 19
#coll19enr is college attendance var
control23<-subset(analysis23, coll23comp==0, select=id:isbsz2)

#weighted means and standard deviations

X <- cbind(control23$black, control23$hisp, control23$i_bmmaed, control23$i_bmfaed, control23$i_parinc, control23$i_intact, control23$i_sibsz, control23$USborn, control23$i_farm, control23$i_south, control23$i_catholic, control23$i_jewish, control23$i_abil, control23$i_hsprog, control23$i_parencq, control23$i_frndplq, control23$i_mom18, control23$i_mom22)

tsamp23<-weighted.mean(analysis23$coll23comp, analysis23$weight, na.rm=FALSE)
csamp23<-1-tsamp23

cmean23 <- weighted.means <- c(apply(X, 2, FUN = function(x) weighted.mean(x, control23$weight, na.rm=FALSE)), weighted.mean(control23$children41, control23$weight, na.rm=TRUE), nrow(control23), csamp23)
cmean23[5] <- cmean23[5] * 100000
cmean23

X2 <- cbind(X, control23$children41)

csd23 <- weighted.sd <- c(apply(X2, 2, FUN = function(x) wt.sd(x, control23$weight)),0,0)
csd23[5] <- csd23[5] * 100000
csd23

################################
#re-do descriptives including fertility expectations
################################

#data file newanalysis23_n2013.dta is our Fertility_Plans.dta file merged with analysis23.dta from Brand and Davis
expect23<-read.dta("newanalysis23_n2013.dta")
dim(expect23)

#See note on Amelia and imputations at line 58, above. Same procedure used for the college completion dataset. "expect23a1.dta" is our data file that inlcudes imputed values for the fertility plans variables.
expect23a1<-read.dta("expect23a1.dta")
dim(expect23a1)

expect23c<-subset(expect23a1, coll23comp==0, select=id:children41)
dim(expect23c)

Xec23<- cbind(expect23c$black, expect23c$hisp, expect23c$i_bmmaed, expect23c$i_bmfaed, expect23c$i_parinc, expect23c$i_intact, expect23c$i_sibsz, expect23c$USborn, expect23c$i_farm, expect23c$i_south, expect23c$i_catholic, expect23c$i_jewish, expect23c$i_abil, expect23c$i_hsprog, expect23c$i_parencq, expect23c$i_frndplq, expect23c$i_mom18, expect23c$i_mom22, expect23c$idealkid79, expect23c$desirekid79, expect23c$expectkid79, expect23c$diffkid, expect23c$children41)

Xetsamp23<-weighted.mean(expect23a1$coll23comp, expect23a1$weight, na.rm=FALSE)
Xecsamp23<-1-Xetsamp23

expect23cmean<-weighted.means<-c(apply(Xec23, 2, FUN=function(x) weighted.mean(x, expect23c$weight, na.rm=TRUE)), nrow(expect23c), Xecsamp23)
expect23cmean[5]<-expect23cmean[5]*100000
expect23cmean

expect23csd <- weighted.sd <- c(apply(Xec23, 2, FUN = function(x) wt.sd(x, expect23c$weight)),0,0)
expect23csd[5] <- expect23csd[5] * 100000
expect23csd

#################################
#College completion by age 23   #
#################################

#limit data to treatment cases, college attendance by age 19
#coll19enr is college attendance var

treatment23<-subset(analysis23, coll23comp==1, select=id:isbsz2)

XT <- cbind(treatment23$black, treatment23$hisp, treatment23$i_bmmaed, treatment23$i_bmfaed, treatment23$i_parinc, treatment23$i_intact, treatment23$i_sibsz, treatment23$USborn, treatment23$i_farm, treatment23$i_south, treatment23$i_catholic, treatment23$i_jewish, treatment23$i_abil, treatment23$i_hsprog, treatment23$i_parencq, treatment23$i_frndplq, treatment23$i_mom18, treatment23$i_mom22)

tmean23 <- c(apply(XT, 2, FUN = function(x) weighted.mean(x, treatment23$weight, na.rm=FALSE)), weighted.mean(treatment23$children41, treatment23$weight, na.rm=TRUE), nrow(treatment23), tsamp23)
tmean23[5] <- tmean23[5] * 100000
tmean23

X2T <- cbind(XT, treatment23$children41)

tsd23 <- weighted.sd.treat <- c(apply(X2T, 2, FUN = function(x) wt.sd(x, treatment23$weight)),0,0)
tsd23[5] <- tsd23[5] * 100000
tsd23

################################
#re-do descriptives including fertility expectations
################################

expect23t<-subset(expect23a1, coll23comp==1, select=id:children41)
dim(expect23t)


Xet23<- cbind(expect23t$black, expect23t$hisp, expect23t$i_bmmaed, expect23t$i_bmfaed, expect23t$i_parinc, expect23t$i_intact, expect23t$i_sibsz, expect23t$USborn, expect23t$i_farm, expect23t$i_south, expect23t$i_catholic, expect23t$i_jewish, expect23t$i_abil, expect23t$i_hsprog, expect23t$i_parencq, expect23t$i_frndplq, expect23t$i_mom18, expect23t$i_mom22, expect23t$idealkid79, expect23t$desirekid79, expect23t$expectkid79, expect23t$diffkid, expect23t$children41)

expect23tmean<-weighted.means<-c(apply(Xet23, 2, FUN=function(x) weighted.mean(x, expect23t$weight, na.rm=TRUE)), nrow(expect23t), Xetsamp23)
expect23tmean[5]<-expect23tmean[5]*100000
expect23tmean

expect23tsd <- weighted.sd <- c(apply(Xet23, 2, FUN = function(x) wt.sd(x, expect23t$weight)),0,0)
expect23tsd[5] <- expect23tsd[5] * 100000
expect23tsd


#####################
#Table 1            #
#####################

#Table1 replicated
Variables<-c("Black (0-1)", "Hispanic (0-1)", "Mother's education (years)", "Father's education (years)", "Parents' income (1979 dollars)", "Intact family age 14 (0-1)", "Number of siblings", "U.S.-born (0-1)", "Rural residence, age 14 (0-1)", "Southern residence, age 14 (0-1)", "Catholic (0-1)", "Jewish (0-1)", "Mental ability", "College-preparatory track (0-1)", "Parents' encouraged college (0-1)", "Friend's plans (years schooling)", "Had a child by age 18 (0-1)", "Had a child by age 22 (0-1)", "Number of children age 41", "Sample Size", "Weighted Sample Proportion")
Table1<-cbind(Variables, cmean19, csd19, tmean19, tsd19, cmean23, csd23, tmean23, tsd23)
#write.csv(Table1, file="Table1.csv")

#Table1 with fertility expectations
Variables<-c("Black (0-1)", "Hispanic (0-1)", "Mother's education (years)", "Father's education (years)", "Parents' income (1979 dollars)", "Intact family age 14 (0-1)", "Number of siblings", "U.S.-born (0-1)", "Rural residence, age 14 (0-1)", "Southern residence, age 14 (0-1)", "Catholic (0-1)", "Jewish (0-1)", "Mental ability", "College-preparatory track (0-1)", "Parents' encouraged college (0-1)", "Friend's plans (years schooling)", "Had a child by age 18 (0-1)", "Had a child by age 22 (0-1)", "Number of ideal kids", "Number of kids wanted", "Number of kids expected", "Difference between expected and realized", "Number of children age 41", "Sample Size", "Weighted Sample Proportion")
Table1.rev<-cbind(Variables, expect19cmean, expect19csd, expect19tmean, expect19tsd, expect23cmean, expect23csd, expect23tmean, expect23tsd)
#write.csv(Table1.rev, file="Table1rev.csv")

#####################
#Propensity scores  #
#####################

#install.packages("MatchIt")
library(MatchIt)
#install.packages("cem")
library(cem) 


########### College Attendance Models

dim(analysis19)

#Subset the data into variables we want

check19<-subset(analysis19, select=c(id, weight, coll19enr, USborn, black, hisp, i_bmmaed, i_bmfaed, i_parinc, i_intact, i_sibsz, i_farm, i_south, i_catholic, i_jewish, i_abil, i_hsprog, i_parencq, i_frndplq, i_mom18, i_mom22, children41))
dim(check19)

#use CEM to measure imbalance
imbalance<-imbalance(group=check19$coll19enr, data=check19, drop=c("children41", "coll19enr"))
imbalance
#totally unbalanced, L1 is 1!

#Probit model to generate propensity scores, regress treatment on explanatory vars

library(Zelig)
#install.packages("survey")
library(survey)

#weighted probit for Propensity Scores and coefficients for Table 2
attend.probit.out<-zelig(coll19enr ~ black + hisp + i_bmmaed + i_bmfaed + maedfaed + i_parinc + i_intact + i_sibsz + USborn + i_farm + i_south + i_catholic + i_jewish + i_abil + i_hsprog + i_parencq + i_frndplq + i_mom18 + good, model="probit.survey", weights=~weight, data=analysis19)
summary(attend.probit.out)

#For Wald test
#install.packages("aod")
library(aod)
wald.test(Sigma=vcov(attend.probit.out), b=coef(attend.probit.out), Terms=2:20)
#here we get X2=363.6, P=0.0; Brand/Davis get X2=370.59, P=.000

#Generate fitted values, estimating probability of assignment to treatment
PSest<-attend.probit.out$fitted.values

#Attach fitted values to the data
analysis19fit<-cbind(analysis19,PSest)

#look at graphs of treatment/control propensities - very different
par(mfrow=c(1,2))
hist(PSest[analysis19fit$coll19enr==1], xlab="Propensity Score", main="Treatment")
hist(PSest[analysis19fit$coll19enr==0], xlab="Propensity Score", main="Control")
dev.off()


#########re-estimate propensity score with expectkid79

maedfaed19<-subset(expect19, select=c(id, maedfaed, good))

expect19forps<-merge(expect19a1, maedfaed19, by="id")
dim(expect19forps)

attend.probit.expect<-zelig(coll19enr ~ black + hisp + i_bmmaed + i_bmfaed + maedfaed + i_parinc + i_intact + i_sibsz + USborn + i_farm + i_south + i_catholic + i_jewish + i_abil + i_hsprog + i_parencq + i_frndplq + i_mom18 + good + expectkid79, model="probit.survey", weights=~weight, data=expect19forps)
summary(attend.probit.expect)

#For Wald test
wald.test(Sigma=vcov(attend.probit.expect), b=coef(attend.probit.expect), Terms=2:20)
#here we get X2=363.7, P=0.0

#Generate fitted values, estimating probability of assignment to treatment
pscore.expect<-attend.probit.expect$fitted.values

#Attach fitted values to the data
expect19fit<-cbind(expect19forps,pscore.expect)

#look at graphs of treatment/control propensities - very different
par(mfrow=c(1,2))
hist(pscore.expect[expect19fit$coll19enr==1], xlab="Propensity Score", main="Treatment")
hist(pscore.expect[expect19fit$coll19enr==0], xlab="Propensity Score", main="Control")
dev.off()



########### College Completion Models

dim(analysis23)

check23<-subset(analysis23, select=c(id, weight, coll23comp, USborn, black, hisp, i_bmmaed, i_bmfaed, i_parinc, i_intact, i_sibsz, i_farm, i_south, i_catholic, i_jewish, i_abil, i_hsprog, i_parencq, i_frndplq, i_mom18, i_mom22, children41))
dim(check23)

#use CEM to measure imbalance
imbalance23<-imbalance(group=check23$coll23comp, data=check23, drop=c("children41", "coll23comp"))
imbalance23
#totally unbalanced, L1 is 0.999!

#skip right to weighted probit for Propensity Scores and coefficients for Table 2
complete.probit.out<-zelig(coll23comp ~ black + hisp + i_bmmaed + i_bmfaed + i_parinc + i_intact + i_sibsz + i_farm + i_south + i_catholic + i_jewish + i_abil + i_hsprog + i_parencq + i_frndplq + i_mom18 + i_mom22 + good, model="probit.survey", weights=~weight, data=analysis23)
summary(complete.probit.out)
#we get this warning, not sure if it matters: Warning message: In eval(expr, envir, enclos) : non-integer #successes in a binomial glm!

wald.test(Sigma=vcov(complete.probit.out), b=coef(complete.probit.out), Terms=2:19)
#we get X2=271.8, P=0.0, Brand/Davis get X2=278.74, P=.000

#Generate fitted values estimating propensity score
PScore<-complete.probit.out$fitted.values

#Attach fitted values to data
analysis23fit<-cbind(analysis23,PScore)

#look at graphs of treatment/control propensities - very different
par(mfrow=c(1,2))
hist(PScore[analysis23fit$coll23comp==1], xlab="Propensity Score", main="Treatment")
hist(PScore[analysis23fit$coll23comp==0], xlab="Propensity Score", main="Control")
dev.off()

#########re-estimate propensity score with expectkid79

good23<-subset(expect23, select=c(id, good))

expect23forps<-merge(expect23a1, good23, by="id")
dim(expect23forps)

complete.probit.expect<-zelig(coll23comp ~ black + hisp + i_bmmaed + i_bmfaed + i_parinc + i_intact + i_sibsz + i_farm + i_south + i_catholic + i_jewish + i_abil + i_hsprog + i_parencq + i_frndplq + i_mom18 + i_mom22 + good + expectkid79, model="probit.survey", weights=~weight, data=expect23forps)
summary(complete.probit.expect)

#For Wald test
wald.test(Sigma=vcov(complete.probit.expect), b=coef(complete.probit.expect), Terms=2:20)
#here we get X2=272.1, P=0.0

#Generate fitted values, estimating probability of assignment to treatment
c.pscore.expect<-complete.probit.expect$fitted.values

#Attach fitted values to the data
expect23fit<-cbind(expect23forps,c.pscore.expect)

#look at graphs of treatment/control propensities - very different
par(mfrow=c(1,2))
hist(c.pscore.expect[expect23fit$coll23comp==1], xlab="Propensity Score", main="Treatment")
hist(c.pscore.expect[expect23fit$coll23comp==0], xlab="Propensity Score", main="Control")
dev.off()


#####################
#Table 2            #
#####################

#Table 2 replicated
Variables2<-c("Constant","Black", "Hispanic", "Mother's Education", "Father's Education", "Mother's Education x Father's Education", "Parents' income (1979 $1,000s)", "Intact Family", "Number of Siblings", "U.S.-born", "Rural Residence", "Southern Residence", "Catholic", "Jewish", "Mental Ability", "College-preparatory Track", "Parents' Encouragement", "Friend's Plans", "Child by Age 18", "Child by Age 22", "Nonmissing on Covariates", "Wald X2", "P>X2")
CollegeAttendCoef<-coef(attend.probit.out)
ColAttend1to19<-cbind(CollegeAttendCoef[1:19])
ColAttend20<-cbind(CollegeAttendCoef[20])
waldattend<-363.6
px2attend<-0.0
t2c2<-rbind(ColAttend1to19,"",ColAttend20, waldattend, px2attend)

CollegeAttendSE<-sqrt(diag(vcov(attend.probit.out)))
case1to19<-cbind(CollegeAttendSE[1:19])
case20<-cbind(CollegeAttendSE[20])
t2c3<-rbind(case1to19,"",case20,"","")

cccoef<-coef(complete.probit.out)
cc1to5<-cbind(cccoef[1:5])
cc6to8<-cbind(cccoef[6:8])
cc9to19<-cbind(cccoef[9:19])
waldcomplete<-271.8
px2complete<-0.0

t2c4<-rbind(cc1to5,"",cc6to8,"",cc9to19, waldcomplete, px2complete)

ccse<-sqrt(diag(vcov(complete.probit.out)))
ccse1to5<-cbind(ccse[1:5])
ccse6to8<-cbind(ccse[6:8])
ccse9to19<-cbind(ccse[9:19])
t2c5<-rbind(ccse1to5,"",ccse6to8,"",ccse9to19,"","")

Table2<-cbind(Variables2, t2c2, t2c3, t2c4, t2c5)
#write.csv(Table2, file="Table2.csv")

#Table 2 with fertility expectations
Variables2expect<-c("Constant","Black", "Hispanic", "Mother's Education", "Father's Education", "Mother's Education x Father's Education", "Parents' income (1979 $1,000s)", "Intact Family", "Number of Siblings", "U.S.-born", "Rural Residence", "Southern Residence", "Catholic", "Jewish", "Mental Ability", "College-preparatory Track", "Parents' Encouragement", "Friend's Plans", "Child by Age 18", "Child by Age 22", "Nonmissing on Covariates", "Number of kids expected", "Wald X2", "P>X2")
CollegeAttendCoefexpect<-coef(attend.probit.expect)
ColAttend1to20ex<-cbind(CollegeAttendCoefexpect[1:20])
ColAttend21ex<-cbind(CollegeAttendCoefexpect[21])
waldattendex<-363.7
px2attendex<-0.0
t2c2expect<-rbind(ColAttend1to20ex,"",ColAttend21ex, waldattendex, px2attendex)

CollegeAttendSEex<-sqrt(diag(vcov(attend.probit.expect)))
case1to20ex<-cbind(CollegeAttendSEex[1:20])
case21ex<-cbind(CollegeAttendSEex[21])
t2c3expect<-rbind(case1to20ex,"",case21ex,"","")

cccoefex<-coef(complete.probit.expect)
cc1to5ex<-cbind(cccoefex[1:5])
cc6to8ex<-cbind(cccoefex[6:8])
cc9to20ex<-cbind(cccoefex[9:20])
waldcompleteex<-272.1
px2completeex<-0.0

t2c4ex<-rbind(cc1to5ex,"",cc6to8ex,"",cc9to20ex, waldcompleteex, px2completeex)

ccseex<-sqrt(diag(vcov(complete.probit.expect)))
ccse1to5ex<-cbind(ccseex[1:5])
ccse6to8ex<-cbind(ccseex[6:8])
ccse9to20ex<-cbind(ccseex[9:20])
t2c5ex<-rbind(ccse1to5ex,"",ccse6to8ex,"",ccse9to20ex,"","")

Table2rev<-cbind(Variables2expect, t2c2expect, t2c3expect, t2c4ex, t2c5ex)
#write.csv(Table2rev, file="Table2rev.csv")



#####################
#Poisson model      #
#####################

####College Attendance
attend.out<-zelig(children41 ~ coll19enr + PSest, model="poisson.survey", weights=~weight, data=analysis19fit)
summary(attend.out)

wald.test(Sigma=vcov(attend.out), b=coef(attend.out), Terms=2:3)


#re run with new pscore estimates from including expectkid79
attend.expect<-zelig(children41 ~ coll19enr + pscore.expect + expectkid79, model="poisson.survey", weights=~weight, data=expect19fit)
summary(attend.expect)

wald.test(Sigma=vcov(attend.expect), b=coef(attend.expect), Terms=2:3)


####College Completion
complete.out<-zelig(children41 ~ coll23comp + PScore, model="poisson.survey", weights=~weight, data=analysis23fit)
summary(complete.out)

wald.test(Sigma=vcov(complete.out), b=coef(complete.out), Terms=2:3)

#re run with new pscore estimates from including expectkid79
complete.expect<-zelig(children41 ~ coll23comp + c.pscore.expect + expectkid79, model="poisson.survey", weights=~weight, data=expect23fit)
summary(complete.expect)

wald.test(Sigma=vcov(complete.expect), b=coef(complete.expect), Terms=2:3)


#####################
#Table 3            # 
#####################

#Table 3 replicated
Variables3<-c("Constant", "College Attendance", "College Completion", "Propensity Score","Wald X2", "P>X2")

attendcoef<-coef(attend.out)
attend1to2<-cbind(attendcoef[1:2])
attend3<-attendcoef[3]
waldt3c2<-14.3
px2t3c2<-0.00078
t3c2<-rbind(attend1to2,"",attend3,waldt3c2,px2t3c2)

attendse<-sqrt(diag(vcov(attend.out)))
attendse1to2<-cbind(attendse[1:2])
attendse3<-attendse[3]
t3c3<-rbind(attendse1to2,"",attendse3,"","")

completecoef<-coef(complete.out)
complete1<-completecoef[1]
complete2to3<-cbind(completecoef[2:3])
waldt3c4<-20.0
px2t3c4<-0.0
t3c4<-rbind(complete1,"",complete2to3,waldt3c4,px2t3c4)

completese<-sqrt(diag(vcov(complete.out)))
completese1<-completese[1]
completese2to3<-cbind(completese[2:3])
t3c5<-rbind(completese1,"",completese2to3,"","")

Table3<-cbind(Variables3,t3c2,t3c3,t3c4,t3c5)
#write.csv(Table3, file="Table3.csv")

#Table 3 revised with expectation variable
Variables3ex<-c("Constant", "College Attendance", "College Completion", "Propensity Score", "Number of kids expected", "Wald X2", "P>X2")

attendcoefex<-coef(attend.expect)
attend1to2ex<-cbind(attendcoefex[1:2])
attend3to4ex<-cbind(attendcoefex[3:4])
waldt3c2ex<-16.3
px2t3c2ex<-0.00029
t3c2ex<-rbind(attend1to2ex,"",attend3to4ex,waldt3c2ex,px2t3c2ex)

attendseex<-sqrt(diag(vcov(attend.expect)))
attendse1to2ex<-cbind(attendseex[1:2])
attendse3to4ex<-cbind(attendseex[3:4])
t3c3ex<-rbind(attendse1to2ex,"",attendse3to4ex,"","")

completecoefex<-coef(complete.expect)
complete1ex<-completecoefex[1]
complete2to4ex<-cbind(completecoefex[2:4])
waldt3c4ex<-21.1
px2t3c4ex<-0.0
t3c4ex<-rbind(complete1ex,"",complete2to4ex,waldt3c4ex,px2t3c4ex)

completeseex<-sqrt(diag(vcov(complete.expect)))
completese1ex<-completeseex[1]
completese2to4ex<-cbind(completeseex[2:4])
t3c5ex<-rbind(completese1ex,"",completese2to4ex,"","")

Table3ex<-cbind(Variables3ex,t3c2ex,t3c3ex,t3c4ex,t3c5ex)
#write.csv(Table3ex, file="Table3rev.csv")


#########################
#Mean Covariate Balance #
#########################

#weighted means to check balance by propensity score strata, these are the original numbers from Brand & Davis

stratum1c19<-subset(analysis19fit, coll19enr==0 & PSest>0 & PSest<0.1, select=id:PSest)
stratum1t19<-subset(analysis19fit, coll19enr==1 & PSest>0 & PSest<0.1, select=id:PSest)

tab4X <- cbind(stratum1c19$black, stratum1c19$hisp, stratum1c19$i_bmmaed, stratum1c19$i_bmfaed, stratum1c19$i_parinc, stratum1c19$i_intact, stratum1c19$i_sibsz, stratum1c19$USborn, stratum1c19$i_farm, stratum1c19$i_south, stratum1c19$i_catholic, stratum1c19$i_jewish, stratum1c19$i_abil, stratum1c19$i_hsprog, stratum1c19$i_parencq, stratum1c19$i_frndplq, stratum1c19$i_mom18, stratum1c19$PSest)


ctab4mean19 <- weighted.means <- c(apply(tab4X, 2, FUN = function(x) weighted.mean(x, stratum1c19$weight, na.rm=FALSE)), nrow(stratum1c19))
ctab4mean19[5] <- ctab4mean19[5] * 100000
ctab4mean19

ctab4sd19 <- weighted.sd <- apply(tab4X, 2, FUN=function(x) wt.sd(x, stratum1c19$weight))
ctab4sd19

tab4Xt <- cbind(stratum1t19$black, stratum1t19$hisp, stratum1t19$i_bmmaed, stratum1t19$i_bmfaed, stratum1t19$i_parinc, stratum1t19$i_intact, stratum1t19$i_sibsz, stratum1t19$USborn, stratum1t19$i_farm, stratum1t19$i_south, stratum1t19$i_catholic, stratum1t19$i_jewish, stratum1t19$i_abil, stratum1t19$i_hsprog, stratum1t19$i_parencq, stratum1t19$i_frndplq, stratum1t19$i_mom18, stratum1t19$PSest)

ttab4mean19<-weighted.means<-c(apply(tab4Xt, 2, FUN=function(x) weighted.mean(x, stratum1t19$weight, na.rm=FALSE)), nrow(stratum1t19))
ttab4mean19[5]<-ttab4mean19[5] * 100000
ttab4mean19

ttab4sd19 <- weighted.sd <- apply(tab4Xt, 2, FUN=function(x) wt.sd(x, stratum1t19$weight))
ttab4sd19

#weighted means to check balance by propensity score strata, these include the expectation variable

ex.stratum1c19<-subset(expect19fit, coll19enr==0 & pscore.expect>0 & pscore.expect<0.1, select=id:pscore.expect)
ex.stratum1t19<-subset(expect19fit, coll19enr==1 & pscore.expect>0 & pscore.expect<0.1, select=id:pscore.expect)

ex.tab4X <- cbind(ex.stratum1c19$black, ex.stratum1c19$hisp, ex.stratum1c19$i_bmmaed, ex.stratum1c19$i_bmfaed, ex.stratum1c19$i_parinc, ex.stratum1c19$i_intact, ex.stratum1c19$i_sibsz, ex.stratum1c19$USborn, ex.stratum1c19$i_farm, ex.stratum1c19$i_south, ex.stratum1c19$i_catholic, ex.stratum1c19$i_jewish, ex.stratum1c19$i_abil, ex.stratum1c19$i_hsprog, ex.stratum1c19$i_parencq, ex.stratum1c19$i_frndplq, ex.stratum1c19$i_mom18, ex.stratum1c19$expectkid79, ex.stratum1c19$pscore.expect)


ex.ctab4mean19 <- weighted.means <- c(apply(ex.tab4X, 2, FUN = function(x) weighted.mean(x, ex.stratum1c19$weight, na.rm=FALSE)), nrow(ex.stratum1c19))
ex.ctab4mean19[5] <- ex.ctab4mean19[5] * 100000
ex.ctab4mean19

ex.ctab4sd19 <- weighted.sd <- apply(ex.tab4X, 2, FUN=function(x) wt.sd(x, ex.stratum1c19$weight))
ex.ctab4sd19

ex.tab4Xt <- cbind(ex.stratum1t19$black, ex.stratum1t19$hisp, ex.stratum1t19$i_bmmaed, ex.stratum1t19$i_bmfaed, ex.stratum1t19$i_parinc, ex.stratum1t19$i_intact, ex.stratum1t19$i_sibsz, ex.stratum1t19$USborn, ex.stratum1t19$i_farm, ex.stratum1t19$i_south, ex.stratum1t19$i_catholic, ex.stratum1t19$i_jewish, ex.stratum1t19$i_abil, ex.stratum1t19$i_hsprog, ex.stratum1t19$i_parencq, ex.stratum1t19$i_frndplq, ex.stratum1t19$i_mom18, ex.stratum1t19$expectkid79, ex.stratum1t19$pscore.expect)

ex.ttab4mean19<-weighted.means<-c(apply(ex.tab4Xt, 2, FUN=function(x) weighted.mean(x, ex.stratum1t19$weight, na.rm=FALSE)), nrow(ex.stratum1t19))
ex.ttab4mean19[5]<-ex.ttab4mean19[5] * 100000
ex.ttab4mean19

ex.ttab4sd19 <- weighted.sd <- apply(ex.tab4Xt, 2, FUN=function(x) wt.sd(x, ex.stratum1t19$weight))
ex.ttab4sd19

#####################
#Table 4            # 
#####################

Variables4expect<-c("Black", "Hispanic", "Mother's Education", "Father's Education",  "Parents' income/$1,000", "Intact Family", "Number of Siblings", "U.S.-born", "Rural Residence", "Southern Residence", "Catholic", "Jewish", "Mental Ability", "College-preparatory Track", "Parents' Encouragement", "Friend's Plans", "Child by Age 18", "Number of kids expected", "Propensity Score", "Sample Size")

ctab4mean19.1to17<-cbind(ctab4mean19[1:17])
ctab4mean19.18to19<-cbind(ctab4mean19[18:19])

ctab4mean19sp<-rbind(ctab4mean19.1to17,"",ctab4mean19.18to19)

ttab4mean19.1to17<-cbind(ttab4mean19[1:17])
ttab4mean19.18to19<-cbind(ttab4mean19[18:19])

ttab4mean19sp<-rbind(ttab4mean19.1to17,"",ttab4mean19.18to19)

ex.table4<-cbind(Variables4expect, ctab4mean19sp, ttab4mean19sp, ex.ctab4mean19, ex.ttab4mean19)
#write.csv(ex.table4, file="Table4rev.csv")


##########################
#Export Data for Figures # 
##########################

t5attend<-analysis19fit
attach(t5attend)
t5attend$stratum[PSest>0 & PSest<0.1]<-1
t5attend$stratum[PSest>0.1 & PSest<0.2]<-2
t5attend$stratum[PSest>0.2 & PSest<0.3]<-3
t5attend$stratum[PSest>0.3 & PSest<0.4]<-4
t5attend$stratum[PSest>0.4 & PSest<0.6]<-5
t5attend$stratum[PSest>0.6 & PSest<1]<-6
detach(t5attend)
#write.dta(t5attend, "analysis19block.dta")

t5complete<-analysis23fit
attach(t5complete)
t5complete$stratum[PScore>0 & PScore<0.05]<-1
t5complete$stratum[PScore>0.05 & PScore<0.1]<-2
t5complete$stratum[PScore>0.1 & PScore<0.2]<-3
t5complete$stratum[PScore>0.2 & PScore<0.4]<-4
t5complete$stratum[PScore>0.4 & PScore<0.6]<-5
t5complete$stratum[PScore>0.6 & PScore<1]<-6
detach(t5complete)
#write.dta(t5complete, "analysis23block.dta")




############################
#Multilevel Poisson (STATA)#     
############################

#write.dta(expect19fit, "expect19fit.dta")

#see Stata do-file "HTE Stata_Upload.do" for Table 5


